# Author: Kyle Schirmann
# Date: December 7, 2023

library(stargazer)
library(readxl)
library(dplyr)
library(lubridate)
library(sandwich)
library(htmlTable)
library(magrittr)
library(ggplot2)
library(MASS)
library(kableExtra)
library(forcats)

rm(list=ls())

# NOTE: To generate the "_UNKNOWNS" tables as a robustness check:
# 1) Change REQUIRE_ALL_DATA = FALSE
# 2) Update OUTPUT_TABLE_2_PATH and OUTPUT_TABLE_2_INTERACT_PATH to a different filename (with _unk at the end, before the extension)
# 3) In this line: df_Overall_Summary_Stat <- df_omnibus %>%
#    DELETE the married filter line and ADD the following line:
#    filter(!is.na(perform_avg)) %>%
# 4) Update the LaTeX labels in these stargazer outputs accordingly, as well as the footnotes

#options(error=browser)

##### SET THESE CONSTANTS
PATH_WORKING_DIRECTORY = "~/Documents/Research/brac/dataverse"
PATH_OUTPUT_FOLDER_NAME = "out"

TABLE_OUTPUT_TEX = TRUE
REQUIRE_ALL_DATA = TRUE
SE_TYPE = "HC1"

PATH_TO_EMPLOYEE_SURVEY =    paste0("./", "employee_survey.xlsx")
PATH_TO_MANAGER_SURVEY  =    paste0("./", "manager_survey.xlsx")
PATH_TO_ATTENDANCE_RECORDS = paste0("./", "employee_attendance_records.xlsx")
PATH_TO_ATTRITION_DATA     = paste0("./", "attrition_data.xlsx")

OUTPUT_FIGURE_1B_PATH =      paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "figure_1B.pdf")
OUTPUT_FIGURE_D1_PATH =      paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "figure_D1.pdf")

OUTPUT_TABLE_1_PATH =              paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "selfrating_ols.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))
OUTPUT_TABLE_1_INTERACT_PATH =     paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "selfrating_ols_interact.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))
OUTPUT_TABLE_1_OP_PATH =           paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "selfrating_oprobit.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))
OUTPUT_TABLE_1_OP_INTERACT_PATH =  paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "selfrating_oprobit_interact.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))
OUTPUT_TABLE_1_NC_PATH =           paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "selfrating_ols_nc.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))
OUTPUT_TABLE_1_NC_INTERACT_PATH =  paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "selfrating_ols_nc_interact.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))

OUTPUT_TABLE_2_PATH =             paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "mgreval_ols.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))
OUTPUT_TABLE_2_INTERACT_PATH =    paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "mgreval_ols_interact.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))
OUTPUT_TABLE_2_OP_PATH =          paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "mgreval_oprobit.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))
OUTPUT_TABLE_2_OP_INTERACT_PATH = paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "mgreval_oprobit_interact.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))
OUTPUT_TABLE_2_NC_PATH =          paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "mgreval_ols_nc.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))
OUTPUT_TABLE_2_NC_INTERACT_PATH = paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "mgreval_ols_nc_interact.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))

OUTPUT_TABLE_C2_PATH =            paste0("./", PATH_OUTPUT_FOLDER_NAME, "/", "balance_table.", ifelse(TABLE_OUTPUT_TEX, "tex", "htm"))
##### END CONSTANTS

setwd(PATH_WORKING_DIRECTORY)
dir.create(file.path(PATH_WORKING_DIRECTORY, PATH_OUTPUT_FOLDER_NAME), showWarnings = FALSE)
message("#### Data Import")
message(paste("Working directory is ", PATH_WORKING_DIRECTORY))
rm(PATH_WORKING_DIRECTORY, PATH_OUTPUT_FOLDER_NAME)

message("Note: If there are duplicate reports for the same worker in any file, only the most recently submitted is preserved.")
get_employee_survey_data <- function(fname) {
  
  df <- read_excel(fname, skip = 1)
  
  names(df)[grep("^Staff ID:.*Number 1$",colnames(df))] <- "pin_digit_1"
  names(df)[grep("^Staff ID:.*Number 2$",colnames(df))] <- "pin_digit_2"
  names(df)[grep("^Staff ID:.*Number 3$",colnames(df))] <- "pin_digit_3"
  names(df)[grep("^Staff ID:.*Number 4$",colnames(df))] <- "pin_digit_4"
  names(df)[grep("^Staff ID:.*Number 5$",colnames(df))] <- "pin_digit_5"
  names(df)[grep("^Staff ID:.*Number 6$",colnames(df))] <- "pin_digit_6"
  
  df$pin <- paste0(ifelse(is.na(df$pin_digit_1), '', df$pin_digit_1),
                   ifelse(is.na(df$pin_digit_2), '', df$pin_digit_2),
                   ifelse(is.na(df$pin_digit_3), '', df$pin_digit_3),
                   ifelse(is.na(df$pin_digit_4), '', df$pin_digit_4),
                   ifelse(is.na(df$pin_digit_5), '', df$pin_digit_5),
                   ifelse(is.na(df$pin_digit_6), '', df$pin_digit_6))
  
  df <- df %>%
    filter(df$pin != paste(rep("0",6), collapse=''))
  df <- df %>%
    filter(df$pin != "")
  df <- df %>%
    filter(df$`Distribution Channel` != "preview")
  df$pin <- as.numeric(as.character(df$pin))
  
  df$married <- ifelse(df$`Are you married?` == "Yes", 1, 0)
  df$masters_phd <- ifelse(df$`Highest Educational Degree` == "Masters Degree", 1, ifelse(df$`Highest Educational Degree` == "PhD", 1, 0))
  df$spouse_wfh <- ifelse(df$`Is your spouse working from home?` == "Yes", 1, 0)
  df$care_children <- ifelse(df$`Do you have to take care of your children while working from home?` == "Yes", 1, 0)
  df$anxious <- ifelse(df$`Do you feel anxious about your health when working in the office?` == "Yes", 1, 0)
  
  df_survey_newcol <- function(inp_df, rx, new_col_name) {
    cidx <- grep(rx, colnames(inp_df))
    inp_df[,ncol(inp_df) + 1] <- inp_df[,cidx]
    names(inp_df)[ncol(inp_df)] = new_col_name
    inp_df[which(inp_df[,ncol(inp_df)] == "Strongly Agree\r\n7\r\n"), ncol(inp_df)] <- "7"
    inp_df[which(inp_df[,ncol(inp_df)] == "4\r\n"), ncol(inp_df)] <- "4"
    inp_df[which(inp_df[,ncol(inp_df)] == "Strongly Disagree\r\n1\r\n"), ncol(inp_df)] <- "1"
    inp_df[,ncol(inp_df)] <- apply(inp_df[,ncol(inp_df)], 2,
                                          function(x) as.numeric(as.character(x)))
    return(inp_df)
  }
  
  df <- df_survey_newcol(df, ".*a.) I feel left out*", "feel_left_out") # Feel Left Out
  df <- df_survey_newcol(df, ".*b.) I miss out on*", "miss_mentorship") # Miss Mentorship
  df <- df_survey_newcol(df, ".*c.) I feel out of*", "feel_out_of_loop") # Feel Out of the Loop
  df <- df_survey_newcol(df, ".*d.) I miss face-to-face*", "miss_f2f") # Miss F2F w/Coworkers
  df <- df_survey_newcol(df, ".*e.) I feel isolated*", "feel_isolated") # Feel Isolated
  df <- df_survey_newcol(df, ".*f.) I miss the emotional support*", "miss_emotional") # Miss Coworker Emotional Support
  df <- df_survey_newcol(df, ".*g.) I miss informal interactions*", "miss_informal")  # Miss Informal Interactions
  
  df <- df_survey_newcol(df, ".*a) The time I spend on family*", "wfh_family_interferes_during_oh") # Family Interferes Work During OH
  df <- df_survey_newcol(df, ".*b) The time I spend on family*", "wfh_family_interferes_after_oh") # Family Interferes Work After OH
  df <- df_survey_newcol(df, ".*c) The time I spend with my family*", "wfh_family_prevents_pd") # Family Prevents Prof Dev
  df <- df_survey_newcol(df, ".*d) It becomes*", "wfh_family_work_stress") # Family Causes Work Stress
  
  df <- df_survey_newcol(df, ".*a)\tOverall, I am satisfied*", "wfh_satisfied") # Satisfied w/WFH
  df <- df_survey_newcol(df, ".*b) Working from home allows*", "wfh_works_better_home") # Works better at home
  df <- df_survey_newcol(df, ".*c) If I were now given*", "wfh_unlikely_return") # Unlikely to choose to go back to office
  df <- df_survey_newcol(df, ".*d)\tSince I*", "wfh_better_wlb") # WLB
  df <- df_survey_newcol(df, ".*e)\tSince I*", "wfh_productivity_inc") # Productivity Increased WFH
  
  
  # Keep only the most recent survey, if the employee filled it out multiple times
  df <- df %>%
    group_by(pin) %>%
    slice(which.max(`End Date`))
  
  ncolidx = which(colnames(df) == "pin")
  df <- df %>%
    dplyr::select(ncolidx:last_col()) %>%
    data.frame()
  
  return(df)
}
message(paste0("Importing employee survey from ", PATH_TO_EMPLOYEE_SURVEY))
df_employee_survey <- get_employee_survey_data(PATH_TO_EMPLOYEE_SURVEY)
rm(get_employee_survey_data, PATH_TO_EMPLOYEE_SURVEY)
message(sprintf("... found %d unique workers", nrow(df_employee_survey)))

get_manager_survey_data <- function(fname, standardize_performance = FALSE) {
  df <- read_excel(fname, sheet = "SheetEnhanced", skip = 1)
  
  df <- subset(df, emp_pin != -1)
  
  df_survey_newcol_mgr <- function(inp_df, rx, new_col_name) {
    cidx <- grep(rx, colnames(inp_df))
    inp_df[,ncol(inp_df) + 1] <- inp_df[,cidx]
    names(inp_df)[ncol(inp_df)] = new_col_name
    inp_df[which(inp_df[,ncol(inp_df)] == "Excellent\r\n7\r\n"), ncol(inp_df)] <- "7"
    inp_df[,ncol(inp_df)] <- apply(inp_df[,ncol(inp_df)], 2,
                                   function(x) as.numeric(as.character(x)))
    return(inp_df)
  }
  
  df <- df_survey_newcol_mgr(df, ".*Creativity$", "perform_creativity")
  df <- df_survey_newcol_mgr(df, ".*Cooperation$", "perform_coop")
  df <- df_survey_newcol_mgr(df, ".*Job Knowledge$", "perform_knowledge")
  df <- df_survey_newcol_mgr(df, ".*Productivity$", "perform_productivity")
  df <- df_survey_newcol_mgr(df, ".*Quality of Work$", "perform_quality")
  df <- df_survey_newcol_mgr(df, ".*Ability$", "perform_ability")
  df$perform_avg <- 1/6 * (df$perform_ability +
                           df$perform_coop +
                           df$perform_knowledge +
                           df$perform_creativity +
                           df$perform_productivity +
                           df$perform_quality)
  
  # Keep only the LAST submission per PIN
  df <- df %>%
  group_by(emp_pin) %>%
    filter(row_number() == n()) %>%
    data.frame()
  
  ncolidx = which(colnames(df) == "emp_pin")
  df <- df %>%
    dplyr::select((ncolidx):last_col()) %>%
    data.frame()
  
  colnames(df)[colnames(df) == "emp_pin"] ="pin"
  
  if(standardize_performance) {
    df$perform_ability <- scale(df$perform_ability, center = TRUE, scale = TRUE)
    df$perform_coop <- scale(df$perform_coop, center = TRUE, scale = TRUE)
    df$perform_knowledge <- scale(df$perform_knowledge, center = TRUE, scale = TRUE)
    df$perform_creativity <- scale(df$perform_creativity, center = TRUE, scale = TRUE)
    df$perform_productivity <- scale(df$perform_productivity, center = TRUE, scale = TRUE)
    df$perform_quality <- scale(df$perform_quality, center = TRUE, scale = TRUE)
    df$perform_avg <- scale(df$perform_avg, center = TRUE, scale = TRUE)
  }
  
  return(df)
}
message(paste0("Importing manager-assessed performance from ", PATH_TO_MANAGER_SURVEY))
df_manager_survey <- get_manager_survey_data(PATH_TO_MANAGER_SURVEY, standardize_performance = FALSE)
rm(get_manager_survey_data, PATH_TO_MANAGER_SURVEY)
message(sprintf("... found %d unique workers", nrow(df_manager_survey)))

message(paste0("Importing attendance from ", PATH_TO_ATTENDANCE_RECORDS))
get_attendance_data <- function(fname) {
  df <- read_excel(PATH_TO_ATTENDANCE_RECORDS)

  colnames(df)[colnames(df) == "PIN"] = "pin"
  colnames(df)[colnames(df) == "Gender"] = "gender"
  colnames(df)[colnames(df) == "Department"] = "department"
  
  df$days_in_office <- 0
  attendance_col_indices <- grep("^Attendance_", colnames(df))
  for(idx in attendance_col_indices) {
    nmz <- as.numeric(ifelse(df[,idx] == "WORKING_IN_OFFICE", 1, ifelse(df[,idx] == "WORKING_IN_OFFICE_EXCEPTION", 1, 0)))
    df$days_in_office <- df$days_in_office + nmz
  }
  
  df <- dplyr::select(df, c("pin", "department", "gender", "days_in_office"))
  
  return(df)
}
df_attendance_data <- get_attendance_data(PATH_TO_ATTENDANCE_RECORDS)
rm(get_attendance_data, PATH_TO_ATTENDANCE_RECORDS)
message(sprintf("... found %d unique workers", nrow(df_attendance_data)))
message("")

# We start from the attendance data, and then link to the others.
message("### Merging records")
message("Attendance + Employee Survey -> Omnibus")
df_omnibus <- merge(df_attendance_data, df_employee_survey, by="pin", all.x = TRUE)
rm(df_employee_survey)
message("Omnibus + Manager Survey -> Omnibus")
df_omnibus <- merge(df_omnibus, df_manager_survey, by="pin", all.x = TRUE)
rm(df_manager_survey)

df_omnibus$is_manager <- ifelse(df_omnibus$pin %in% c(), 1, ifelse(df_omnibus$pin %in% c(), 0, df_omnibus$is_manager)) #NOTE: we manually adjust manager status in four cases here (IDs omitted for privacy)

message("Note: Attendance data is used as the base")
message(sprintf("    %d employees in attendance data",  nrow(df_attendance_data)))
#message(sprintf("  -  %d employee(s) not found in worker survey data", sum(is.na(df_omnibus$married))))
#message(sprintf("  -   %d employee(s) not found in manager survey data", sum(is.na(df_omnibus$work_avg))))
#message(sprintf("  +   %d employee(s) not found in either (so double-counted above)", sum(is.na(df_omnibus$work_avg) & is.na(df_omnibus$married))))
#message("   ----")
rm(df_attendance_data)
fcount <- table(df_omnibus$department)
message(sprintf("    %d employees with full data for analysis (%d HR, %d microfinance), including NA/UNKNOWN codes",
                nrow(df_omnibus), fcount["HR"], fcount["Microfinance"]))
rm(fcount)

if(REQUIRE_ALL_DATA) {
  df_omnibus <- subset(df_omnibus, care_children != "UNKNOWN") # employee survey
  df_omnibus <- subset(df_omnibus, !is.na(perform_avg))        # manager survey
  message(sprintf("All data required, so using %d with full data", nrow(df_omnibus)))
}

message("")

cutpoints <- quantile(df_omnibus$days_in_office, c(1/3, 2/3, 1))

message("Three-Bin Distribution")
message(sprintf("High WFH = 0 - %d days in office", cutpoints[1]))
message(sprintf("Intermediate WFH = %d - %d days in office", cutpoints[1] + 1, cutpoints[2]))
message(sprintf("Low WFH = %d - %d days in office", cutpoints[2] + 1, cutpoints[3]))
message("")

df_omnibus$wfh_category <- as.factor(ifelse(df_omnibus$days_in_office <= cutpoints[1], "High WFH", ifelse(df_omnibus$days_in_office <= cutpoints[2], "Intermediate WFH", "Low WFH")))

rm(cutpoints)

df_attrition <- read_excel(PATH_TO_ATTRITION_DATA, sheet = "Original (Manual + Emails)")
df_attrition <- df_attrition[c("pin", "retained")]
df_attrition <- merge(df_omnibus, df_attrition, by="pin", all.x = TRUE)
df_attrition <- df_attrition[c("pin", "retained", "department", "wfh_category")]
df_attrition1 <- df_attrition %>%
  subset(department == "HR") %>%
  group_by(wfh_category) %>%
  summarize(retained_pct = mean(retained))

message("Attrition data")
print(df_attrition1) 

df_attrition2 <- df_attrition %>%
  subset(department == "HR")

df_attrition2$retained_str <- ifelse(df_attrition2$retained == 1, "retained", "not retained")

df_attrition2_tbl <- table(df_attrition2$wfh_category, df_attrition2$retained_str)

attritionChiSq <- chisq.test(df_attrition2_tbl)
print(attritionChiSq)

rm(df_attrition, df_attrition1, df_attrition2, df_attrition2_tbl, attritionChiSq, PATH_TO_ATTRITION_DATA)

df_omnibus$department <- as.factor(df_omnibus$department)
df_omnibus$gender <- as.factor(df_omnibus$gender)

df_omnibus$gender_male_indicator <- ifelse(df_omnibus$gender == "M", 1, 0)
df_Overall_Summary_Stat <- df_omnibus %>%
  filter(married != "UNKNOWN") %>%
  summarize(male_mean = mean(gender_male_indicator),
            male_sd = sd(gender_male_indicator),
            mgr_mean = mean(is_manager),
            mgr_sd = sd(is_manager),
            postgrad_mean = mean(masters_phd),
            postgrad_sd = sd(masters_phd),
            married_mean = mean(married),
            married_sd = sd(married),
            spouseWFH_mean = mean(spouse_wfh),
            spouseWFH_sd = sd(spouse_wfh),
            care_children_mean = mean(care_children),
            care_children_sd = sd(care_children),
            wfh_satisfied_mean = mean(wfh_satisfied),
            wfh_satisfied_sd = sd(wfh_satisfied),
            wfh_better_wlb_mean = mean(wfh_better_wlb),
            wfh_better_wlb_sd = sd(wfh_better_wlb),
            wfh_unlikely_return_mean = mean(wfh_unlikely_return),
            wfh_unlikely_return_sd = sd(wfh_unlikely_return),
            miss_f2f_mean = mean(miss_f2f),
            miss_f2f_sd = sd(miss_f2f),
            miss_mentorship_mean = mean(miss_mentorship),
            miss_mentorship_sd = sd(miss_mentorship),
            feel_isolated_mean = mean(feel_isolated),
            feel_isolated_sd = sd(feel_isolated),
            perform_coop_mean = mean(perform_coop),
            perform_coop_sd = sd(perform_coop),
            perform_quality_mean = mean(perform_quality),
            perform_quality_sd = sd(perform_quality),
            perform_avg_mean = mean(perform_avg),
            perform_avg_sd = sd(perform_avg),
            perform_ability_mean = mean(perform_ability),
            perform_ability_sd = sd(perform_ability),
            perform_creativity_mean = mean(perform_creativity),
            perform_creativity_sd = sd(perform_creativity),
            perform_knowledge_mean = mean(perform_knowledge),
            perform_knowledge_sd = sd(perform_knowledge),
            perform_productivity_mean = mean(perform_productivity),
            perform_productivity_sd = sd(perform_productivity),
            obs = n())

message(paste0("Printing data for Table C.1"))
df_C1 <- df_omnibus %>%
  filter(married != "UNKNOWN") %>%
  summarize(male_mean = mean(gender_male_indicator),
            male_sd = sd(gender_male_indicator),
            mgr_mean = mean(is_manager),
            mgr_sd = sd(is_manager),
            postgrad_mean = mean(masters_phd),
            postgrad_sd = sd(masters_phd),
            married_mean = mean(married),
            married_sd = sd(married),
            spouseWFH_mean = mean(spouse_wfh),
            spouseWFH_sd = sd(spouse_wfh),
            care_children_mean = mean(care_children),
            care_children_sd = sd(care_children),
            obs = n())

print(df_C1)
rm(df_C1)
message("... done")


message(paste0("Generating Table C.2 -> ", OUTPUT_TABLE_C2_PATH))

df_A1 <- df_omnibus %>%
  group_by(wfh_category) %>%
  filter(married != "UNKNOWN") %>%
  summarize(male_mean = mean(gender_male_indicator),
            male_sd = sd(gender_male_indicator),
            mgr_mean = mean(is_manager),
            mgr_sd = sd(is_manager),
            postgrad_mean = mean(masters_phd),
            postgrad_sd = sd(masters_phd),
            married_mean = mean(married),
            married_sd = sd(married),
            spouseWFH_mean = mean(spouse_wfh),
            spouseWFH_sd = sd(spouse_wfh),
            care_children_mean = mean(care_children),
            care_children_sd = sd(care_children),
            obs = n())

if(TABLE_OUTPUT_TEX) {

A1_latex_table <- data.frame(c1=c("", "Male", "Manager", "Postgraduate", "Married", "Spouse WFH", "Caring for Child"),
                             c2=c("Mean", sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$male_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$mgr_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$postgrad_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$married_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$spouseWFH_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$care_children_mean[1])),
                             c3=c("SD", sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$male_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$mgr_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$postgrad_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$married_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$spouseWFH_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$care_children_sd[1])),
                             c4=c("Mean", sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$male_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$mgr_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$postgrad_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$married_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$spouseWFH_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$care_children_mean[1])),
                             c5=c("SD", sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$male_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$mgr_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$postgrad_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$married_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$spouseWFH_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$care_children_sd[1])),
                             c6=c("Mean", sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$male_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$mgr_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$postgrad_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$married_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$spouseWFH_mean[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$care_children_mean[1])),
                             c7=c("SD", sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$male_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$mgr_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$postgrad_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$married_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$spouseWFH_sd[1]),
                                 sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$care_children_sd[1])))

n1 <- sprintf("High WFH\n(n = %d)", subset(df_A1, wfh_category == "High WFH")$obs[1])
n2 <- sprintf("Intermediate WFH\n(n = %d)", subset(df_A1, wfh_category == "Intermediate WFH")$obs[1])
n3 <- sprintf("Low WFH\n(n = %d)", subset(df_A1, wfh_category == "Low WFH")$obs[1])

hrow <- c(1, 2, 2, 2)
names(hrow) <- c(" ", n1, n2, n3)

kbl(A1_latex_table, format = "latex", label = "balance", col.names = NULL, caption = "Covariate Balance After Randomization", booktabs = T) %>%
  add_header_above(hrow) %>%
  save_kable(OUTPUT_TABLE_C2_PATH, float = FALSE)

fl <- readLines(OUTPUT_TABLE_C2_PATH)
fl <- fl[!fl == "\\addlinespace"]
fl[1] <- "\\begin{table}[!htbp]"
fl[7] <- paste("\\multicolumn{1}{c}{ } & \\multicolumn{2}{c}{\\shortstack{High WFH \\\\ (n = ", subset(df_A1, wfh_category == "High WFH")$obs[1], ")}} & \\multicolumn{2}{c}{\\shortstack{Intermediate WFH \\\\ (n = ", subset(df_A1, wfh_category == "Intermediate WFH")$obs[1], ")}} & \\multicolumn{2}{c}{\\shortstack{Low WFH \\\\ (n = ", subset(df_A1, wfh_category == "Low WFH")$obs[1],")}} \\\\", sep = "")
writeLines(fl, con = OUTPUT_TABLE_C2_PATH)
rm(fl)

rm(n1, n2, n3, hrow)

rm(A1_latex_table)

} else {

A1_tbl <- htmlTable(matrix(c(sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$male_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$mgr_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$postgrad_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$married_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$spouseWFH_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$care_children_mean[1]),
                             sprintf("%d", subset(df_A1, wfh_category == "High WFH")$obs[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$male_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$mgr_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$postgrad_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$married_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$spouseWFH_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "High WFH")$care_children_sd[1]),
                             "<nbsp/>",
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$male_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$mgr_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$postgrad_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$married_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$spouseWFH_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$care_children_mean[1]),
                             sprintf("%d", subset(df_A1, wfh_category == "Intermediate WFH")$obs[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$male_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$mgr_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$postgrad_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$married_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$spouseWFH_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Intermediate WFH")$care_children_sd[1]),
                             "<nbsp/>",
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$male_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$mgr_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$postgrad_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$married_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$spouseWFH_mean[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$care_children_mean[1]),
                             sprintf("%d", subset(df_A1, wfh_category == "Low WFH")$obs[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$male_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$mgr_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$postgrad_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$married_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$spouseWFH_sd[1]),
                             sprintf("%0.2f", subset(df_A1, wfh_category == "Low WFH")$care_children_sd[1]),
                             "<nbsp/>"),
                           ncol = 6,
                           dimnames = list(rows = c("Male", "Manager", "Postgraduate", "Married", "Spouse WFH", "Caring for Child", "Observations"),
                                           cols = c("Mean", "SD","Mean", "SD","Mean", "SD"))),
                    cgroup = c("High WFH", "Intermediate WFH", "Low WFH"), n.cgroup = c(2,2,2), total = TRUE)
write(A1_tbl, OUTPUT_TABLE_C2_PATH)
rm(A1_tbl)
}
#print(A1_tbl, useViewer = TRUE)
rm(df_A1, OUTPUT_TABLE_C2_PATH)
message("... done")

message("ANOVA: gender_male_indicator")
print(summary(aov(gender_male_indicator ~ wfh_category, data = df_omnibus)))
message("")
message("ANOVA: is_manager")
print(summary(aov(is_manager ~ wfh_category, data = df_omnibus)))
message("")
message("ANOVA: masters_phd")
print(summary(aov(masters_phd ~ wfh_category, data = df_omnibus)))
message("")
message("ANOVA: spouse_wfh")
print(summary(aov(spouse_wfh ~ wfh_category, data = df_omnibus)))
message("")
message("ANOVA: care_children")
print(summary(aov(care_children ~ wfh_category, data = df_omnibus)))

df_omnibus$married <- as.factor(ifelse(is.na(df_omnibus$married), "UNKNOWN", ifelse(df_omnibus$married == 1, "YES", "NO")))
df_omnibus$postgrad <- as.factor(ifelse(is.na(df_omnibus$masters_phd), "UNKNOWN", ifelse(df_omnibus$masters_phd == 1, "YES", "NO")))
df_omnibus$spouse_wfh <- as.factor(ifelse(is.na(df_omnibus$spouse_wfh), "UNKNOWN", ifelse(df_omnibus$spouse_wfh == 1, "YES", "NO")))
df_omnibus$care_children <- as.factor(ifelse(is.na(df_omnibus$care_children), "UNKNOWN", ifelse(df_omnibus$care_children == 1, "YES", "NO")))
df_omnibus$anxious <- as.factor(ifelse(is.na(df_omnibus$anxious), "UNKNOWN", ifelse(df_omnibus$anxious == 1, "YES", "NO")))
df_omnibus$department <- as.factor(df_omnibus$department) # all valid
df_omnibus$gender <- as.factor(df_omnibus$gender) # all valid

v_wfhIntermediate_gender <- subset(df_omnibus, wfh_category == "Intermediate WFH")$gender_male_indicator
v_wfhHigh_gender <- subset(df_omnibus, wfh_category == "High WFH")$gender_male_indicator
v_wfhIntermediate_mgr <- subset(df_omnibus, wfh_category == "Intermediate WFH")$is_manager
v_wfhHigh_mgr <- subset(df_omnibus, wfh_category == "High WFH")$is_manager

t.test(v_wfhIntermediate_gender, v_wfhHigh_gender, alternative = "two.sided", var.equal = FALSE)
t.test(v_wfhIntermediate_mgr, v_wfhHigh_mgr, alternative = "two.sided", var.equal = FALSE)

rm(v_wfhIntermediate_gender, v_wfhHigh_gender,v_wfhIntermediate_mgr, v_wfhHigh_mgr)

get_reg_formula_with_controls <- function(dv, as_factor = FALSE, interaction = FALSE) {
  controls <- " + gender + department + postgrad + married + spouse_wfh + care_children"
  spec <- NULL
  if(as_factor && !interaction) {
    spec <- paste("factor(", dv, ") ~ wfh_category", controls, " + is_manager")
  } else if(!as_factor && interaction) {
    spec <- paste(dv, " ~ wfh_category*is_manager", controls)
  } else if(as_factor && interaction) {
    spec <- paste("factor(", dv, ") ~ wfh_category*is_manager", controls)
  } else {
    spec <- paste(dv, " ~ wfh_category", controls, "+ is_manager")
  }
  
  #message("Specification: ", spec)
  spec <- as.formula(spec)
  return(spec)
}

tabname <- function(path, prefix = "tab") {
  fname <- tail(strsplit(path, "/")[[1]], n = 1)
  fname <- head(strsplit(fname, "\\.")[[1]], n = 1)
  return(paste0(prefix, ":", fname))
}

HEADERS_TABLE_1 <- c('\\shortstack{Job \\\\ Satisfaction}',
                     '\\shortstack{Work-Life \\\\ Balance}',
                     '\\shortstack{Prefer \\\\ WFH}',
                     '\\shortstack{Miss \\\\ Face-to-Face}',
                     '\\shortstack{Miss \\\\ Mentorship}',
                     '\\shortstack{Feel \\\\ Isolated}')
HEADERS_TABLE_2 <- c("Ability",
                     "Cooperation",
                     "Creativity",
                     "Knowledge",
                     "Productivity",
                     "'\\shortstack{Quality \\\\ of Work}'",
                     "'\\shortstack{Overall \\\\ Performance}'")

SUMMARY_ROW_TABLE_1 <- list(c("d.v. mean", sprintf("%.2f", df_Overall_Summary_Stat$wfh_satisfied_mean),
                                           sprintf("%.2f", df_Overall_Summary_Stat$wfh_better_wlb_mean),
                                           sprintf("%.2f", df_Overall_Summary_Stat$wfh_unlikely_return_mean),
                                           sprintf("%.2f", df_Overall_Summary_Stat$miss_f2f_mean),
                                           sprintf("%.2f", df_Overall_Summary_Stat$miss_mentorship_mean),
                                           sprintf("%.2f", df_Overall_Summary_Stat$feel_isolated_mean)),
                         c("d.v. std. dev.", sprintf("%.2f", df_Overall_Summary_Stat$wfh_satisfied_sd),
                                             sprintf("%.2f", df_Overall_Summary_Stat$wfh_better_wlb_sd),
                                             sprintf("%.2f", df_Overall_Summary_Stat$wfh_unlikely_return_sd),
                                             sprintf("%.2f", df_Overall_Summary_Stat$miss_f2f_sd),
                                             sprintf("%.2f", df_Overall_Summary_Stat$miss_mentorship_sd),
                                             sprintf("%.2f", df_Overall_Summary_Stat$feel_isolated_sd)))
SUMMARY_ROW_TABLE_2 <- list(c("d.v. mean", sprintf("%.2f", df_Overall_Summary_Stat$perform_ability_mean),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_coop_mean),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_creativity_mean),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_knowledge_mean),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_productivity_mean),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_quality_mean),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_avg_mean)),
                            c("d.v. std. dev.", sprintf("%.2f", df_Overall_Summary_Stat$perform_ability_sd),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_coop_sd),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_creativity_sd),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_knowledge_sd),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_productivity_sd),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_quality_sd),
                              sprintf("%.2f", df_Overall_Summary_Stat$perform_avg_sd)))

noconst <- function(inp) {
  return(inp[-length(inp)])
}

NO_INTERACTION_COVARIATE_LABELS <- c("Intermediate WFH", "Low WFH", "Male", "Postgrad", "Married", "Spouse WFH", "Cares for Child", "Is Manager", "Microfinance", "Constant")
NO_INTERACTION_COVARIATE_KEEP <- c("wfh_categoryIntermediate WFH", "wfh_categoryLow WFH", "genderM", "postgradYES", "marriedYES", "spouse_wfhYES", "care_childrenYES", "is_manager", "departmentMicrofinance", "Constant")
INTERACTION_COVARIATE_LABELS <- c("Intermediate WFH", "$\\times$ Is Manager", "Low WFH", "$\\times$ Is Manager", "Male", "Postgrad", "Married", "Spouse WFH", "Cares for Child", "Is Manager", "Microfinance", "Constant")
INTERACTION_COVARIATE_KEEP <- c("wfh_categoryIntermediate WFH", "wfh_categoryIntermediate WFH:is_manager", "wfh_categoryLow WFH", "wfh_categoryLow WFH:is_manager", "genderM", "postgradYES", "marriedYES", "spouse_wfhYES", "care_childrenYES", "is_manager", "departmentMicrofinance", "Constant")

message(paste0("Generating Table 1 -> ", OUTPUT_TABLE_1_PATH))

t1c1 <- lm(get_reg_formula_with_controls("wfh_satisfied"), df_omnibus)
t1c1_se <- sqrt(diag(vcovHC(t1c1, type=SE_TYPE)))
t1c2 <- lm(get_reg_formula_with_controls("wfh_better_wlb"), df_omnibus)
t1c2_se <- sqrt(diag(vcovHC(t1c2, type=SE_TYPE)))
t1c3 <- lm(get_reg_formula_with_controls("wfh_unlikely_return"), df_omnibus)
t1c3_se <- sqrt(diag(vcovHC(t1c3, type=SE_TYPE)))
t1c4 <- lm(get_reg_formula_with_controls("miss_f2f"), df_omnibus)
t1c4_se <- sqrt(diag(vcovHC(t1c4, type=SE_TYPE)))
t1c5 <- lm(get_reg_formula_with_controls("miss_mentorship"), df_omnibus)
t1c5_se <- sqrt(diag(vcovHC(t1c5, type=SE_TYPE)))
t1c6 <- lm(get_reg_formula_with_controls("feel_isolated"), df_omnibus)
t1c6_se <- sqrt(diag(vcovHC(t1c6, type=SE_TYPE)))

sink(nullfile())
stargazer(t1c1,
          t1c2,
          t1c3,
          t1c4,
          t1c5,
          t1c6,
          se = c(list(t1c1_se),
                 list(t1c2_se),
                 list(t1c3_se),
                 list(t1c4_se),
                 list(t1c5_se),
                 list(t1c6_se)),
          dep.var.labels = HEADERS_TABLE_1,
          title = "Intensity of WFH and Employee Attitudes",
          style = "qje",
          dep.var.caption = "",
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
          order = NO_INTERACTION_COVARIATE_KEEP,
          keep = NO_INTERACTION_COVARIATE_KEEP,
          covariate.labels = NO_INTERACTION_COVARIATE_LABELS,
          header = FALSE,
          no.space = TRUE,
          omit.stat = c("f", "adj.rsq", "ser"),
          add.lines = SUMMARY_ROW_TABLE_1,
          label = tabname(OUTPUT_TABLE_1_PATH),
          out = OUTPUT_TABLE_1_PATH)
sink()

rm(OUTPUT_TABLE_1_PATH)
message("... done")


message(paste0("Generating Figure D.1 -> ", OUTPUT_FIGURE_D1_PATH))

extract_coeff <- function(coeff_name, reg, SEs, stat) {
  if(stat == "LOW_ERROR_BAR" || stat == "HIGH_ERROR_BAR") {
    cf = reg$coefficients[coeff_name]
    se = SEs[coeff_name]
    oneninesix = qnorm(.975)
    if(stat == "LOW_ERROR_BAR") {
      return(cf - oneninesix * se)
    } else {
      return(cf + oneninesix * se)
    }
  } else{
    if(stat == "MIDPT") {
      return(reg$coefficients[coeff_name])
    }
  }
}

df_f1a_low_wfh <- data.frame(midpt = c(extract_coeff("wfh_categoryLow WFH", t1c1, t1c1_se, "MIDPT"),
                                       extract_coeff("wfh_categoryLow WFH", t1c2, t1c2_se, "MIDPT"),
                                       extract_coeff("wfh_categoryLow WFH", t1c3, t1c3_se, "MIDPT"),
                                       extract_coeff("wfh_categoryLow WFH", t1c4, t1c4_se, "MIDPT"),
                                       extract_coeff("wfh_categoryLow WFH", t1c5, t1c5_se, "MIDPT"),
                                       extract_coeff("wfh_categoryLow WFH", t1c6, t1c6_se, "MIDPT")),
                             low_error_bar = c(extract_coeff("wfh_categoryLow WFH", t1c1, t1c1_se, "LOW_ERROR_BAR"),
                                               extract_coeff("wfh_categoryLow WFH", t1c2, t1c2_se, "LOW_ERROR_BAR"),
                                               extract_coeff("wfh_categoryLow WFH", t1c3, t1c3_se, "LOW_ERROR_BAR"),
                                               extract_coeff("wfh_categoryLow WFH", t1c4, t1c4_se, "LOW_ERROR_BAR"),
                                               extract_coeff("wfh_categoryLow WFH", t1c5, t1c5_se, "LOW_ERROR_BAR"),
                                               extract_coeff("wfh_categoryLow WFH", t1c6, t1c6_se, "LOW_ERROR_BAR")),
                             high_error_bar = c(extract_coeff("wfh_categoryLow WFH", t1c1, t1c1_se, "HIGH_ERROR_BAR"),
                                                extract_coeff("wfh_categoryLow WFH", t1c2, t1c2_se, "HIGH_ERROR_BAR"),
                                                extract_coeff("wfh_categoryLow WFH", t1c3, t1c3_se, "HIGH_ERROR_BAR"),
                                                extract_coeff("wfh_categoryLow WFH", t1c4, t1c4_se, "HIGH_ERROR_BAR"),
                                                extract_coeff("wfh_categoryLow WFH", t1c5, t1c5_se, "HIGH_ERROR_BAR"),
                                                extract_coeff("wfh_categoryLow WFH", t1c6, t1c6_se, "HIGH_ERROR_BAR")),
                             label = c("Job\nSatisfaction",
                                       "Work-Life\nBalance",
                                       "Prefer\nWFH",
                                       "Miss\nFace-to-Face",
                                       "Miss\nMentorship",
                                       "Feel\nIsolated"))

df_f1a_inm_wfh <- data.frame(midpt = c(extract_coeff("wfh_categoryIntermediate WFH", t1c1, t1c1_se, "MIDPT"),
                                       extract_coeff("wfh_categoryIntermediate WFH", t1c2, t1c2_se, "MIDPT"),
                                       extract_coeff("wfh_categoryIntermediate WFH", t1c3, t1c3_se, "MIDPT"),
                                       extract_coeff("wfh_categoryIntermediate WFH", t1c4, t1c4_se, "MIDPT"),
                                       extract_coeff("wfh_categoryIntermediate WFH", t1c5, t1c5_se, "MIDPT"),
                                       extract_coeff("wfh_categoryIntermediate WFH", t1c6, t1c6_se, "MIDPT")),
                             low_error_bar = c(extract_coeff("wfh_categoryIntermediate WFH", t1c1, t1c1_se, "LOW_ERROR_BAR"),
                                               extract_coeff("wfh_categoryIntermediate WFH", t1c2, t1c2_se, "LOW_ERROR_BAR"),
                                               extract_coeff("wfh_categoryIntermediate WFH", t1c3, t1c3_se, "LOW_ERROR_BAR"),
                                               extract_coeff("wfh_categoryIntermediate WFH", t1c4, t1c4_se, "LOW_ERROR_BAR"),
                                               extract_coeff("wfh_categoryIntermediate WFH", t1c5, t1c5_se, "LOW_ERROR_BAR"),
                                               extract_coeff("wfh_categoryIntermediate WFH", t1c6, t1c6_se, "LOW_ERROR_BAR")),
                             high_error_bar = c(extract_coeff("wfh_categoryIntermediate WFH", t1c1, t1c1_se, "HIGH_ERROR_BAR"),
                                                extract_coeff("wfh_categoryIntermediate WFH", t1c2, t1c2_se, "HIGH_ERROR_BAR"),
                                                extract_coeff("wfh_categoryIntermediate WFH", t1c3, t1c3_se, "HIGH_ERROR_BAR"),
                                                extract_coeff("wfh_categoryIntermediate WFH", t1c4, t1c4_se, "HIGH_ERROR_BAR"),
                                                extract_coeff("wfh_categoryIntermediate WFH", t1c5, t1c5_se, "HIGH_ERROR_BAR"),
                                                extract_coeff("wfh_categoryIntermediate WFH", t1c6, t1c6_se, "HIGH_ERROR_BAR")),
                             label = c("Job\nSatisfaction",
                                       "Work-Life\nBalance",
                                       "Prefer\nWFH",
                                       "Miss\nFace-to-Face",
                                       "Miss\nMentorship",
                                       "Feel\nIsolated"))

df_f1a_low_wfh$series <- "Low WFH"
df_f1a_inm_wfh$series <- "Intermediate WFH"

df_f1a <- rbind(df_f1a_low_wfh, df_f1a_inm_wfh)

rm(df_f1a_low_wfh, df_f1a_inm_wfh)

ggplot(df_f1a, aes(x=fct_inorder(label),
                   y=midpt,
                   fill=series)) +
  geom_col(position = "dodge2", color = "black") +
  geom_errorbar(aes(ymin = low_error_bar,
                    ymax = high_error_bar),
                position=position_dodge2(width = 0.5, padding = 0.5)) +
  ylab("Coefficient (Relative to High WFH)") +
  xlab("Dimension") +
  theme_bw() +
  scale_fill_grey(start = .8, end = 1) +
  theme(legend.title = element_blank(), legend.position = "bottom", panel.grid = element_blank()) +
  geom_vline(xintercept = 1 + seq(0.5, length(df_f1a), by = 1), color="gray", size=.5, alpha=.5) +
  geom_hline(yintercept = c(-1, 0, 1), color="gray", size=.5, alpha=.5)
ggsave(OUTPUT_FIGURE_D1_PATH, width = 6, height = 4, units = "in")

rm(OUTPUT_FIGURE_D1_PATH, df_f1a)
rm(t1c1, t1c2, t1c3, t1c4, t1c5, t1c6, t1c1_se, t1c2_se, t1c3_se, t1c4_se, t1c5_se, t1c6_se)

message("... done")

message(paste0("Generating Table 1 but without controls (Table F.1) -> ", OUTPUT_TABLE_1_NC_PATH))

t1c1 <- lm(wfh_satisfied ~ wfh_category, df_omnibus)
t1c1_se <- sqrt(diag(vcovHC(t1c1, type=SE_TYPE)))
t1c2 <- lm(wfh_better_wlb ~ wfh_category, df_omnibus)
t1c2_se <- sqrt(diag(vcovHC(t1c2, type=SE_TYPE)))
t1c3 <- lm(wfh_unlikely_return ~ wfh_category, df_omnibus)
t1c3_se <- sqrt(diag(vcovHC(t1c3, type=SE_TYPE)))
t1c4 <- lm(miss_f2f ~ wfh_category, df_omnibus)
t1c4_se <- sqrt(diag(vcovHC(t1c4, type=SE_TYPE)))
t1c5 <- lm(miss_mentorship ~ wfh_category, df_omnibus)
t1c5_se <- sqrt(diag(vcovHC(t1c5, type=SE_TYPE)))
t1c6 <- lm(feel_isolated ~ wfh_category, df_omnibus)
t1c6_se <- sqrt(diag(vcovHC(t1c6, type=SE_TYPE)))

sink(nullfile())
stargazer(t1c1,
          t1c2,
          t1c3,
          t1c4,
          t1c5,
          t1c6,
          se = c(list(t1c1_se),
                 list(t1c2_se),
                 list(t1c3_se),
                 list(t1c4_se),
                 list(t1c5_se),
                 list(t1c6_se)),
          dep.var.labels = HEADERS_TABLE_1,
          title = "Intensity of WFH and Employee Attitudes (No Controls)",
          style = "qje",
          dep.var.caption = "",
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
          order = c("wfh_categoryIntermediate WFH", "wfh_categoryLow WFH", "Constant"),
          keep = c("wfh_categoryIntermediate WFH", "wfh_categoryLow WFH", "Constant"),
          covariate.labels = c("Intermediate WFH", "Low WFH", "Constant"),
          header = FALSE,
          no.space = TRUE,
          omit.stat = c("f", "adj.rsq", "ser"),
          add.lines=append(list(c("Controls", rep("No", 6))), SUMMARY_ROW_TABLE_1),
          label = tabname(OUTPUT_TABLE_1_NC_PATH),
          out = OUTPUT_TABLE_1_NC_PATH)
sink()

rm(OUTPUT_TABLE_1_NC_PATH)
rm(t1c1, t1c2, t1c3, t1c4, t1c5, t1c6)
rm(t1c1_se, t1c2_se, t1c3_se, t1c4_se, t1c5_se, t1c6_se)

message("... done")


message(paste0("Generating Table 1 with interactions but without controls (Table F.2) -> ", OUTPUT_TABLE_1_NC_INTERACT_PATH))

t1c1 <- lm(wfh_satisfied ~ wfh_category*is_manager, df_omnibus)
t1c1_se <- sqrt(diag(vcovHC(t1c1, type=SE_TYPE)))
t1c2 <- lm(wfh_better_wlb ~ wfh_category*is_manager, df_omnibus)
t1c2_se <- sqrt(diag(vcovHC(t1c2, type=SE_TYPE)))
t1c3 <- lm(wfh_unlikely_return ~ wfh_category*is_manager, df_omnibus)
t1c3_se <- sqrt(diag(vcovHC(t1c3, type=SE_TYPE)))
t1c4 <- lm(miss_f2f ~ wfh_category*is_manager, df_omnibus)
t1c4_se <- sqrt(diag(vcovHC(t1c4, type=SE_TYPE)))
t1c5 <- lm(miss_mentorship ~ wfh_category*is_manager, df_omnibus)
t1c5_se <- sqrt(diag(vcovHC(t1c5, type=SE_TYPE)))
t1c6 <- lm(feel_isolated ~ wfh_category*is_manager, df_omnibus)
t1c6_se <- sqrt(diag(vcovHC(t1c6, type=SE_TYPE)))

sink(nullfile())
stargazer(t1c1,
          t1c2,
          t1c3,
          t1c4,
          t1c5,
          t1c6,
          se = c(list(t1c1_se),
                 list(t1c2_se),
                 list(t1c3_se),
                 list(t1c4_se),
                 list(t1c5_se),
                 list(t1c6_se)),
          dep.var.labels = HEADERS_TABLE_1,
          title = "Intensity of WFH and Employee Attitudes (No Controls)",
          style = "qje",
          dep.var.caption = "",
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
         order = c("wfh_categoryIntermediate WFH", "wfh_categoryIntermediate WFH:is_manager", "wfh_categoryLow WFH", "wfh_categoryLow WFH:is_manager", "is_manager", "Constant"),
     keep = c("wfh_categoryIntermediate WFH", "wfh_categoryIntermediate WFH:is_manager", "wfh_categoryLow WFH", "wfh_categoryLow WFH:is_manager", "is_manager", "Constant"),
     covariate.labels = c("Intermediate WFH", "$\\times$ Is Manager", "Low WFH", "$\\times$ Is Manager", "Is Manager", "Constant"),
          header = FALSE,
          no.space = TRUE,
          omit.stat = c("f", "adj.rsq", "ser"),
     add.lines=append(list(c("Other Controls", rep("No", 6))), SUMMARY_ROW_TABLE_1),
     label = tabname(OUTPUT_TABLE_1_NC_INTERACT_PATH),
          out = OUTPUT_TABLE_1_NC_INTERACT_PATH)
sink()

rm(OUTPUT_TABLE_1_NC_INTERACT_PATH)
rm(t1c1, t1c2, t1c3, t1c4, t1c5, t1c6)
rm(t1c1_se, t1c2_se, t1c3_se, t1c4_se, t1c5_se, t1c6_se)

message("... done")

message(paste0("Generating Table 1 with interactions (Table B.1) -> ", OUTPUT_TABLE_1_INTERACT_PATH, ")"))

t1c1i <- lm(get_reg_formula_with_controls("wfh_satisfied", as_factor = FALSE, interaction = TRUE), df_omnibus)
t1c1i_se <- sqrt(diag(vcovHC(t1c1i, type=SE_TYPE)))
t1c2i <- lm(get_reg_formula_with_controls("wfh_better_wlb", as_factor = FALSE, interaction = TRUE), df_omnibus)
t1c2i_se <- sqrt(diag(vcovHC(t1c2i, type=SE_TYPE)))
t1c3i <- lm(get_reg_formula_with_controls("wfh_unlikely_return", as_factor = FALSE, interaction = TRUE), df_omnibus)
t1c3i_se <- sqrt(diag(vcovHC(t1c3i, type=SE_TYPE)))
t1c4i <- lm(get_reg_formula_with_controls("miss_f2f", as_factor = FALSE, interaction = TRUE), df_omnibus)
t1c4i_se <- sqrt(diag(vcovHC(t1c4i, type=SE_TYPE)))
t1c5i <- lm(get_reg_formula_with_controls("miss_mentorship", as_factor = FALSE, interaction = TRUE), df_omnibus)
t1c5i_se <- sqrt(diag(vcovHC(t1c5i, type=SE_TYPE)))
t1c6i <- lm(get_reg_formula_with_controls("feel_isolated", as_factor = FALSE, interaction = TRUE), df_omnibus)
t1c6i_se <- sqrt(diag(vcovHC(t1c6i, type=SE_TYPE)))
sink(nullfile())
stargazer(t1c1i,
          t1c2i,
          t1c3i,
          t1c4i,
          t1c5i,
          t1c6i,
          se = c(list(t1c1i_se),
                 list(t1c2i_se),
                 list(t1c3i_se),
                 list(t1c4i_se),
                 list(t1c5i_se),
                 list(t1c6i_se)),
          dep.var.labels = HEADERS_TABLE_1,
          title = "Intensity of WFH and Employee Attitudes",
          style = "qje",
          dep.var.caption = "",
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
          order = INTERACTION_COVARIATE_KEEP,
          keep = INTERACTION_COVARIATE_KEEP,
          covariate.labels = INTERACTION_COVARIATE_LABELS,
          header = FALSE,
          no.space = TRUE,
          omit.stat = c("f", "adj.rsq", "ser"),
          add.lines = SUMMARY_ROW_TABLE_1,
          label = tabname(OUTPUT_TABLE_1_INTERACT_PATH),
          out = OUTPUT_TABLE_1_INTERACT_PATH)
sink()

rm(t1c1i, t1c2i, t1c3i, t1c4i, t1c5i, t1c6i, t1c1i_se, t1c2i_se, t1c3i_se, t1c4i_se, t1c5i_se, t1c6i_se)
rm(OUTPUT_TABLE_1_INTERACT_PATH)
message("... done")

message(paste0("Generating Table 1 as ordered probit (Table E.1) -> ", OUTPUT_TABLE_1_OP_PATH))

t1opc1 <- polr(get_reg_formula_with_controls("wfh_satisfied", as_factor = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t1opc2 <- polr(get_reg_formula_with_controls("wfh_better_wlb", as_factor = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t1opc3 <- polr(get_reg_formula_with_controls("wfh_unlikely_return", as_factor = TRUE), df_omnibus,method = "probit", Hess = TRUE, model = TRUE)
t1opc4 <- polr(get_reg_formula_with_controls("miss_f2f", as_factor = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t1opc5 <- polr(get_reg_formula_with_controls("miss_mentorship", as_factor = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t1opc6 <- polr(get_reg_formula_with_controls("feel_isolated", as_factor = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)

t1opc1_se <- sqrt(diag(vcovCL(t1opc1)))
t1opc2_se <- sqrt(diag(vcovCL(t1opc2)))
t1opc3_se <- sqrt(diag(vcovCL(t1opc3)))
t1opc4_se <- sqrt(diag(vcovCL(t1opc4)))
t1opc5_se <- sqrt(diag(vcovCL(t1opc5)))
t1opc6_se <- sqrt(diag(vcovCL(t1opc6)))

sink(nullfile())
stargazer(t1opc1,
          t1opc2,
          t1opc3,
          t1opc4,
          t1opc5,
          t1opc6,
          se = c(list(t1opc1_se,
                      t1opc2_se,
                      t1opc3_se,
                      t1opc4_se,
                      t1opc5_se,
                      t1opc6_se)),
          dep.var.labels = HEADERS_TABLE_1,
          title = "Intensity of WFH and Employee Attitudes (Ordered Probit)",
          style = "qje",
          dep.var.caption = "",
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
          order = noconst(NO_INTERACTION_COVARIATE_KEEP),
          keep = noconst(NO_INTERACTION_COVARIATE_KEEP),
          covariate.labels = noconst(NO_INTERACTION_COVARIATE_LABELS),
          header = FALSE,
          model.names = FALSE,
          no.space = TRUE,
          omit.stat = c("f", "adj.rsq", "ser"),
          add.lines = SUMMARY_ROW_TABLE_1,
          label = tabname(OUTPUT_TABLE_1_OP_PATH),
          out = OUTPUT_TABLE_1_OP_PATH)
sink()

rm(t1opc1, t1opc2, t1opc3, t1opc4, t1opc5, t1opc6)
rm(t1opc1_se, t1opc2_se, t1opc3_se, t1opc4_se, t1opc5_se, t1opc6_se)
rm(OUTPUT_TABLE_1_OP_PATH)

message("... done")

message(paste0("Generating Table 1 with interactions as ordered probit (Table E.2) -> ", OUTPUT_TABLE_1_OP_INTERACT_PATH))

t1opc1i <- polr(get_reg_formula_with_controls("wfh_satisfied", as_factor = TRUE, interact = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t1opc2i <- polr(get_reg_formula_with_controls("wfh_better_wlb", as_factor = TRUE, interact = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t1opc3i <- polr(get_reg_formula_with_controls("wfh_unlikely_return", as_factor = TRUE, interact = TRUE), df_omnibus,method = "probit", Hess = TRUE, model = TRUE)
t1opc4i <- polr(get_reg_formula_with_controls("miss_f2f", as_factor = TRUE, interact = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t1opc5i <- polr(get_reg_formula_with_controls("miss_mentorship", as_factor = TRUE, interact = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t1opc6i <- polr(get_reg_formula_with_controls("feel_isolated", as_factor = TRUE, interact = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)

t1opc1i_se <- sqrt(diag(vcovCL(t1opc1i)))
t1opc2i_se <- sqrt(diag(vcovCL(t1opc2i)))
t1opc3i_se <- sqrt(diag(vcovCL(t1opc3i)))
t1opc4i_se <- sqrt(diag(vcovCL(t1opc4i)))
t1opc5i_se <- sqrt(diag(vcovCL(t1opc5i)))
t1opc6i_se <- sqrt(diag(vcovCL(t1opc6i)))

sink(nullfile())
stargazer(t1opc1i,
          t1opc2i,
          t1opc3i,
          t1opc4i,
          t1opc5i,
          t1opc6i,
          se = c(list(t1opc1i_se,
                      t1opc2i_se,
                      t1opc3i_se,
                      t1opc4i_se,
                      t1opc5i_se,
                      t1opc6i_se)),
          dep.var.labels = c('\\shortstack{Job \\\\ Satisfaction}',
                             '\\shortstack{Work-Life \\\\ Balance}',
                             '\\shortstack{Prefer \\\\ WFH}',
                             '\\shortstack{Miss \\\\ Face-to-Face}',
                             '\\shortstack{Miss \\\\ Mentorship}',
                             '\\shortstack{Feel \\\\ Isolated}'),
          title = "Intensity of WFH and Employee Attitudes (Ordered Probit)",
          style = "qje",
          dep.var.caption = "",
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
          order = noconst(INTERACTION_COVARIATE_KEEP),
          keep = noconst(INTERACTION_COVARIATE_KEEP),
          covariate.labels = noconst(INTERACTION_COVARIATE_LABELS),
          header = FALSE,
          model.names = FALSE,
          no.space = TRUE,
          omit.stat = c("f", "adj.rsq", "ser"),
          add.lines=SUMMARY_ROW_TABLE_1,
          label = tabname(OUTPUT_TABLE_1_OP_INTERACT_PATH),
          out = OUTPUT_TABLE_1_OP_INTERACT_PATH)
sink()

rm(t1opc1i, t1opc2i, t1opc3i, t1opc4i, t1opc5i, t1opc6i)
rm(t1opc1i_se, t1opc2i_se, t1opc3i_se, t1opc4i_se, t1opc5i_se, t1opc6i_se)
rm(OUTPUT_TABLE_1_OP_INTERACT_PATH)
message("... done")

message(paste0("Generating Table 2 -> ", OUTPUT_TABLE_2_PATH))

t2c1a <- lm(get_reg_formula_with_controls("perform_coop", as_factor = FALSE, interaction = FALSE), df_omnibus)
t2c1a_se <- sqrt(diag(vcovHC(t2c1a, type=SE_TYPE)))
t2c1b <- lm(get_reg_formula_with_controls("perform_coop", as_factor = FALSE, interaction = TRUE), df_omnibus)
t2c1b_se <- sqrt(diag(vcovHC(t2c1b, type=SE_TYPE)))
t2c2a <- lm(get_reg_formula_with_controls("perform_quality", as_factor = FALSE, interaction = FALSE), df_omnibus)
t2c2a_se <- sqrt(diag(vcovHC(t2c2a, type=SE_TYPE)))
t2c2b <- lm(get_reg_formula_with_controls("perform_quality", as_factor = FALSE, interaction = TRUE), df_omnibus)
t2c2b_se <- sqrt(diag(vcovHC(t2c2b, type=SE_TYPE)))
t2c3a <- lm(get_reg_formula_with_controls("perform_avg", as_factor = FALSE, interaction = FALSE), df_omnibus)
t2c3a_se <- sqrt(diag(vcovHC(t2c3a, type=SE_TYPE)))
t2c3b <- lm(get_reg_formula_with_controls("perform_avg", as_factor = FALSE, interaction = TRUE), df_omnibus)
t2c3b_se <- sqrt(diag(vcovHC(t2c3b, type=SE_TYPE)))

t2c4a <- lm(get_reg_formula_with_controls("perform_ability", as_factor = FALSE, interaction = FALSE), df_omnibus)
t2c4a_se <- sqrt(diag(vcovHC(t2c4a, type=SE_TYPE)))
t2c4b <- lm(get_reg_formula_with_controls("perform_ability", as_factor = FALSE, interaction = TRUE), df_omnibus)
t2c4b_se <- sqrt(diag(vcovHC(t2c4b, type=SE_TYPE)))
t2c5a <- lm(get_reg_formula_with_controls("perform_creativity", as_factor = FALSE, interaction = FALSE), df_omnibus)
t2c5a_se <- sqrt(diag(vcovHC(t2c5a, type=SE_TYPE)))
t2c5b <- lm(get_reg_formula_with_controls("perform_creativity", as_factor = FALSE, interaction = TRUE), df_omnibus)
t2c5b_se <- sqrt(diag(vcovHC(t2c5b, type=SE_TYPE)))
t2c6a <- lm(get_reg_formula_with_controls("perform_knowledge", as_factor = FALSE, interaction = FALSE), df_omnibus)
t2c6a_se <- sqrt(diag(vcovHC(t2c6a, type=SE_TYPE)))
t2c6b <- lm(get_reg_formula_with_controls("perform_knowledge", as_factor = FALSE, interaction = TRUE), df_omnibus)
t2c6b_se <- sqrt(diag(vcovHC(t2c6b, type=SE_TYPE)))
t2c7a <- lm(get_reg_formula_with_controls("perform_productivity", as_factor = FALSE, interaction = FALSE), df_omnibus)
t2c7a_se <- sqrt(diag(vcovHC(t2c7a, type=SE_TYPE)))
t2c7b <- lm(get_reg_formula_with_controls("perform_productivity", as_factor = FALSE, interaction = TRUE), df_omnibus)
t2c7b_se <- sqrt(diag(vcovHC(t2c7b, type=SE_TYPE)))

sink(nullfile())
stargazer(t2c4a,
          t2c1a,
          t2c5a,
          t2c6a,
          t2c7a,
          t2c2a,
          t2c3a,
          se = c(list(t2c4a_se),
                 list(t2c1a_se),
                 list(t2c5a_se),
                 list(t2c6a_se),
                 list(t2c7a_se),
                 list(t2c2a_se),
                 list(t2c3a_se)),
          title = "Intensity of WFH and Manager-Assigned Performance Ratings",
          style = "qje",
          dep.var.labels = HEADERS_TABLE_2,
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
          order = NO_INTERACTION_COVARIATE_KEEP,
          keep = NO_INTERACTION_COVARIATE_KEEP,
          covariate.labels = NO_INTERACTION_COVARIATE_LABELS,
          header = FALSE,
          no.space = TRUE,
          omit.stat = c("f", "adj.rsq", "ser"),
          add.lines = SUMMARY_ROW_TABLE_2,
          # add.lines=list(c("Controls", rep("Yes", 7))),
          label = tabname(OUTPUT_TABLE_2_PATH),
          out = OUTPUT_TABLE_2_PATH)
sink()

rm(OUTPUT_TABLE_2_PATH)
rm(t2c1a, t2c2a, t2c3a, t2c4a, t2c5a, t2c6a, t2c7a)
rm(t2c1a_se, t2c2a_se, t2c3a_se, t2c4a_se, t2c5a_se, t2c6a_se, t2c7a_se)

message("... done")

message(paste0("Generating Table 2 with interactions (Table B.2) -> ", OUTPUT_TABLE_2_INTERACT_PATH, ")"))

sink(nullfile())
stargazer(t2c4b,
          t2c1b,
          t2c5b,
          t2c6b,
          t2c7b,
          t2c2b,
          t2c3b,
          se = c(list(t2c4b_se),
                 list(t2c1b_se),
                 list(t2c5b_se),
                 list(t2c6b_se),
                 list(t2c7b_se),
                 list(t2c2b_se),
                 list(t2c3b_se)),
          title = "Intensity of WFH and Manager-Assigned Performance Ratings",
          style = "qje",
          dep.var.labels = HEADERS_TABLE_2,
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
          order = INTERACTION_COVARIATE_KEEP,
          keep = INTERACTION_COVARIATE_KEEP,
          covariate.labels = INTERACTION_COVARIATE_LABELS,
          header = FALSE,
          no.space = TRUE,
          omit.stat = c("f", "adj.rsq", "ser"),
          add.lines=SUMMARY_ROW_TABLE_2,
          label = tabname(OUTPUT_TABLE_2_INTERACT_PATH),
          out = OUTPUT_TABLE_2_INTERACT_PATH)
sink()

rm(OUTPUT_TABLE_2_INTERACT_PATH)
rm(t2c1b, t2c2b, t2c3b, t2c4b, t2c5b, t2c6b, t2c7b)
rm(t2c1b_se, t2c2b_se, t2c3b_se, t2c4b_se, t2c5b_se, t2c6b_se, t2c7b_se)

message("... done")

message(paste0("Generating Table 2 but without controls (Table F.3) -> ", OUTPUT_TABLE_2_NC_PATH))

t2nc1 <- lm(perform_creativity ~ wfh_category, df_omnibus)
t2nc1_se <- sqrt(diag(vcovHC(t2nc1, type=SE_TYPE)))
t2nc2 <- lm(perform_quality ~ wfh_category, df_omnibus)
t2nc2_se <- sqrt(diag(vcovHC(t2nc2, type=SE_TYPE)))
t2nc3 <- lm(perform_ability ~ wfh_category, df_omnibus)
t2nc3_se <- sqrt(diag(vcovHC(t2nc3, type=SE_TYPE)))
t2nc4 <- lm(perform_coop ~ wfh_category, df_omnibus)
t2nc4_se <- sqrt(diag(vcovHC(t2nc4, type=SE_TYPE)))
t2nc5 <- lm(perform_knowledge ~ wfh_category, df_omnibus)
t2nc5_se <- sqrt(diag(vcovHC(t2nc5, type=SE_TYPE)))
t2nc6 <- lm(perform_productivity ~ wfh_category, df_omnibus)
t2nc6_se <- sqrt(diag(vcovHC(t2nc6, type=SE_TYPE)))
t2nc7 <- lm(perform_avg ~ wfh_category, df_omnibus)
t2nc7_se <- sqrt(diag(vcovHC(t2nc7, type=SE_TYPE)))

sink(nullfile())
stargazer(t2nc3,
          t2nc4,
          t2nc1,
          t2nc5,
          t2nc6,
          t2nc2,
          t2nc7,
          se = c(list(t2nc3_se),
                 list(t2nc4_se),
                 list(t2nc1_se),
                 list(t2nc5_se),
                 list(t2nc6_se),
                 list(t2nc2_se),
                 list(t2nc7_se)),
          title = "Intensity of WFH and Manager-Assigned Performance Ratings \\\\ (No Controls)",
          style = "qje",
          no.space = TRUE,
          dep.var.labels = HEADERS_TABLE_2,
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
          order = c("wfh_categoryIntermediate WFH", "wfh_categoryLow WFH", "Constant"),
          keep = c("wfh_categoryIntermediate WFH", "wfh_categoryLow WFH", "Constant"),
          covariate.labels = c("Intermediate WFH", "Low WFH", "Constant"),
          header = FALSE,
          omit.stat = c("f", "adj.rsq", "ser"),
          add.lines=append(list(c("Controls", rep("No", 7))), SUMMARY_ROW_TABLE_2),
          label = tabname(OUTPUT_TABLE_2_NC_PATH),
          out = OUTPUT_TABLE_2_NC_PATH)
sink()

rm(OUTPUT_TABLE_2_NC_PATH)
rm(t2nc1, t2nc2, t2nc3, t2nc4, t2nc5, t2nc6, t2nc7)
rm(t2nc1_se, t2nc2_se, t2nc3_se, t2nc4_se, t2nc5_se, t2nc6_se, t2nc7_se)
message("... done")

message(paste0("Generating Table 2 with interactions but without controls (Table F.4) -> ", OUTPUT_TABLE_2_NC_INTERACT_PATH))

t2nc1i <- lm(perform_creativity ~ wfh_category*is_manager, df_omnibus)
t2nc1i_se <- sqrt(diag(vcovHC(t2nc1i, type=SE_TYPE)))
t2nc2i <- lm(perform_quality ~ wfh_category*is_manager, df_omnibus)
t2nc2i_se <- sqrt(diag(vcovHC(t2nc2i, type=SE_TYPE)))
t2nc3i <- lm(perform_ability ~ wfh_category*is_manager, df_omnibus)
t2nc3i_se <- sqrt(diag(vcovHC(t2nc3i, type=SE_TYPE)))
t2nc4i <- lm(perform_coop ~ wfh_category*is_manager, df_omnibus)
t2nc4i_se <- sqrt(diag(vcovHC(t2nc4i, type=SE_TYPE)))
t2nc5i <- lm(perform_knowledge ~ wfh_category*is_manager, df_omnibus)
t2nc5i_se <- sqrt(diag(vcovHC(t2nc5i, type=SE_TYPE)))
t2nc6i <- lm(perform_productivity ~ wfh_category*is_manager, df_omnibus)
t2nc6i_se <- sqrt(diag(vcovHC(t2nc6i, type=SE_TYPE)))
t2nc7i <- lm(perform_avg ~ wfh_category*is_manager, df_omnibus)
t2nc7i_se <- sqrt(diag(vcovHC(t2nc7i, type=SE_TYPE)))

sink(nullfile())
stargazer(t2nc3i,
          t2nc4i,
          t2nc1i,
          t2nc5i,
          t2nc6i,
          t2nc2i,
          t2nc7i,
          se = c(list(t2nc3i_se),
                 list(t2nc4i_se),
                 list(t2nc1i_se),
                 list(t2nc5i_se),
                 list(t2nc6i_se),
                 list(t2nc2i_se),
                 list(t2nc7i_se)),
          title = "Intensity of WFH and Manager-Assigned Performance Ratings \\\\ (No Controls)",
          style = "qje",
          no.space = TRUE,
          dep.var.labels = HEADERS_TABLE_2,
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
          order = c("wfh_categoryIntermediate WFH", "wfh_categoryIntermediate WFH:is_manager", "wfh_categoryLow WFH", "wfh_categoryLow WFH:is_manager", "is_manager", "Constant"),
          keep = c("wfh_categoryIntermediate WFH", "wfh_categoryIntermediate WFH:is_manager", "wfh_categoryLow WFH", "wfh_categoryLow WFH:is_manager", "is_manager", "Constant"),
          covariate.labels = c("Intermediate WFH", "$\\times$ Is Manager", "Low WFH", "$\\times$ Is Manager", "Is Manager", "Constant"),
          header = FALSE,
          omit.stat = c("f", "adj.rsq", "ser"),
          add.lines=append(list(c("Other Controls", rep("No", 7))), SUMMARY_ROW_TABLE_2),
          label = tabname(OUTPUT_TABLE_2_NC_INTERACT_PATH),
          out = OUTPUT_TABLE_2_NC_INTERACT_PATH)
sink()

rm(OUTPUT_TABLE_2_NC_INTERACT_PATH)
rm(t2nc1i, t2nc2i, t2nc3i, t2nc4i, t2nc5i, t2nc6i, t2nc7i)
rm(t2nc1i_se, t2nc2i_se, t2nc3i_se, t2nc4i_se, t2nc5i_se, t2nc6i_se, t2nc7i_se)
message("... done")

message(paste0("Generating Table 2 as ordered probit (Table E.3) -> ", OUTPUT_TABLE_2_OP_PATH))

t2opc1 <- polr(get_reg_formula_with_controls("perform_creativity", as_factor = TRUE, interaction = FALSE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc2 <- polr(get_reg_formula_with_controls("perform_quality", as_factor = TRUE, interaction = FALSE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc3 <- polr(get_reg_formula_with_controls("perform_ability", as_factor = TRUE, interaction = FALSE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc4 <- polr(get_reg_formula_with_controls("perform_coop", as_factor = TRUE, interaction = FALSE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc5 <- polr(get_reg_formula_with_controls("perform_knowledge", as_factor = TRUE, interaction = FALSE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc6 <- polr(get_reg_formula_with_controls("perform_productivity", as_factor = TRUE, interaction = FALSE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc7 <- polr(get_reg_formula_with_controls("perform_avg", as_factor = TRUE, interaction = FALSE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc1_se <- sqrt(diag(vcovCL(t2opc1)))
t2opc2_se <- sqrt(diag(vcovCL(t2opc2)))
t2opc3_se <- sqrt(diag(vcovCL(t2opc3)))
t2opc4_se <- sqrt(diag(vcovCL(t2opc4)))
t2opc5_se <- sqrt(diag(vcovCL(t2opc5)))
t2opc6_se <- sqrt(diag(vcovCL(t2opc6)))
t2opc7_se <- sqrt(diag(vcovCL(t2opc7)))

sink(nullfile())
stargazer(t2opc3,
          t2opc4,
          t2opc1,
          t2opc5,
          t2opc6,
          t2opc2,
          t2opc7,
          se = c(list(t2opc3_se),
                 list(t2opc4_se),
                 list(t2opc1_se),
                 list(t2opc5_se),
                 list(t2opc6_se),
                 list(t2opc2_se),
                 list(t2opc7_se)),
          dep.var.labels = HEADERS_TABLE_2,
          title = "Intensity of WFH and Manager-Assigned Ratings (Ordered Probit)",
          style = "qje",
          dep.var.caption = "",
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
          order = noconst(NO_INTERACTION_COVARIATE_KEEP),
          keep = noconst(NO_INTERACTION_COVARIATE_KEEP),
          covariate.labels =noconst(NO_INTERACTION_COVARIATE_LABELS),
          header = FALSE,
          model.names = FALSE,
          no.space = TRUE,
          omit.stat = c("f", "adj.rsq", "ser"),
          add.lines = SUMMARY_ROW_TABLE_2,
          label = tabname(OUTPUT_TABLE_2_OP_PATH),
          out = OUTPUT_TABLE_2_OP_PATH)
sink()

rm(OUTPUT_TABLE_2_OP_PATH)
rm(t2opc1, t2opc2, t2opc3, t2opc4, t2opc5, t2opc6, t2opc7)
rm(t2opc1_se, t2opc2_se, t2opc3_se, t2opc4_se, t2opc5_se, t2opc6_se, t2opc7_se)
message("... done")

message(paste0("Generating Table 2 with interactions as ordered probit (Table E.4) -> ", OUTPUT_TABLE_2_OP_INTERACT_PATH))

t2opc1i <- polr(get_reg_formula_with_controls("perform_creativity", as_factor = TRUE, interaction = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc2i <- polr(get_reg_formula_with_controls("perform_quality", as_factor = TRUE, interaction = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc3i <- polr(get_reg_formula_with_controls("perform_ability", as_factor = TRUE, interaction = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc4i <- polr(get_reg_formula_with_controls("perform_coop", as_factor = TRUE, interaction = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc5i <- polr(get_reg_formula_with_controls("perform_knowledge", as_factor = TRUE, interaction = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc6i <- polr(get_reg_formula_with_controls("perform_productivity", as_factor = TRUE, interaction = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc7i <- polr(get_reg_formula_with_controls("perform_avg", as_factor = TRUE, interaction = TRUE), df_omnibus, method = "probit", Hess = TRUE, model = TRUE)
t2opc1i_se <- sqrt(diag(vcovCL(t2opc1i)))
t2opc2i_se <- sqrt(diag(vcovCL(t2opc2i)))
t2opc3i_se <- sqrt(diag(vcovCL(t2opc3i)))
t2opc4i_se <- sqrt(diag(vcovCL(t2opc4i)))
t2opc5i_se <- sqrt(diag(vcovCL(t2opc5i)))
t2opc6i_se <- sqrt(diag(vcovCL(t2opc6i)))
t2opc7i_se <- sqrt(diag(vcovCL(t2opc7i)))

sink(nullfile())
stargazer(t2opc3i,
          t2opc4i,
          t2opc1i,
          t2opc5i,
          t2opc6i,
          t2opc2i,
          t2opc7i,
          se = c(list(t2opc3i_se),
                 list(t2opc4i_se),
                 list(t2opc1i_se),
                 list(t2opc5i_se),
                 list(t2opc6i_se),
                 list(t2opc2i_se),
                 list(t2opc7i_se)),
          dep.var.labels = HEADERS_TABLE_2,
          title = "Intensity of WFH and Manager-Assigned Ratings (Ordered Probit)",
          style = "qje",
          dep.var.caption = "",
          type = ifelse(TABLE_OUTPUT_TEX, "latex", "html"),
          order = noconst(INTERACTION_COVARIATE_KEEP),
          keep = noconst(INTERACTION_COVARIATE_KEEP),
          covariate.labels =noconst(INTERACTION_COVARIATE_LABELS),
          header = FALSE,
          model.names = FALSE,
          no.space = TRUE,
          omit.stat = c("f", "adj.rsq", "ser"),
          add.lines = SUMMARY_ROW_TABLE_2,
          label = tabname(OUTPUT_TABLE_2_OP_INTERACT_PATH),
          out = OUTPUT_TABLE_2_OP_INTERACT_PATH)
sink()

rm(OUTPUT_TABLE_2_OP_INTERACT_PATH)
rm(t2opc1i, t2opc2i, t2opc3i, t2opc4i, t2opc5i, t2opc6i, t2opc7i)
rm(t2opc1i_se, t2opc2i_se, t2opc3i_se, t2opc4i_se, t2opc5i_se, t2opc6i_se, t2opc7i_se)
message("... done")

message(paste0("Generating Figure 1B -> ", OUTPUT_FIGURE_1B_PATH))

f1b_median <- median(df_omnibus$days_in_office)
f1b_mean <- mean(df_omnibus$days_in_office)
f1b_subtitle <- sprintf("Median (Mean) Days in Office = %0.0f (%0.0f)", f1b_median, f1b_mean)

ggplot(df_omnibus, aes(x = days_in_office)) +
  geom_density(kernel = "gaussian") +
  xlab("Days in the Office Distribution") +
  ylab("Density") +
  labs(caption = f1b_subtitle)


ggsave(OUTPUT_FIGURE_1B_PATH, width = 6, height = 4, units = "in")
rm(f1b_mean, f1b_median, f1b_subtitle, OUTPUT_FIGURE_1B_PATH)
message("... done")


rm(df_Overall_Summary_Stat)
rm(SUMMARY_ROW_TABLE_1, SUMMARY_ROW_TABLE_2)
rm(HEADERS_TABLE_1, HEADERS_TABLE_2)
rm(INTERACTION_COVARIATE_KEEP, INTERACTION_COVARIATE_LABELS)
rm(NO_INTERACTION_COVARIATE_KEEP, NO_INTERACTION_COVARIATE_LABELS)
rm(extract_coeff, noconst, tabname)
rm(get_reg_formula_with_controls)

message("sessionInfo() results:")
print(sessionInfo())